Set up

Loading packages

I love the pacman package! It loads and installs the packages as needed, so you never have to use the library() and in install() functions as often.

The introdataviz package can help with raincloud plots. Uncomment it to install it once, then re-comment.

knitr::opts_chunk$set(echo = TRUE)
pacman::p_load(here, # helps keep paths short by starting them from the project
               dplyr, # awesome package for data wrangling
               ggplot2, # the reason we are all here today 
               ggpubr, # helps join up figures in clumps
               ggh4x, # has special function for facetting
               RColorBrewer, # color palette
               viridis, # another color palette
               stringr, # finding/manipulating strings
               tidyr) # great for wrangling

devtools::install_github("psyteachr/introdataviz") # if you want rainclouds
## Using GitHub PAT from the git credential store.
## Skipping install of 'introdataviz' from a github remote, the SHA1 (0519c980) has not changed since last install.
##   Use `force = TRUE` to force installation

A theme is the canvas behind the data, in a figure. ggplot2 has many basic themes to choose from: basic themes. They all easily customisable and customisation is your friend! Let’s set one here, and come back and play with custom themes once we have made some basic figures.

# Ryssa's go-to theme
theme_set(theme_light()+
            theme(
              plot.title = element_text(size=rel(1.2),face="bold"),
              axis.title = element_text(size=rel(1),face="bold"),
              axis.text = element_text(size=rel(1),colour = 'black'),
              strip.text = element_text(size=rel(1),colour = 'black', 
                                        face = "bold"),
              legend.text = element_text(size=rel(1)),
              legend.title = element_text(size=rel(1),face="bold"),
              panel.grid = element_blank()))

Data sets

Personality traits

We had 322 people fill in questionnaires about themselves then try to estimate levels of movement synchrony in videos.

Moffat et al. (2024). Evaluations of dyadic synchrony: observers’ traits influence estimation and enjoyment of synchrony in mirror-game movements.

This data frame include the following columns:

  • ID partcipant identifiers
  • age participant age
  • extraversion_z z-scored extraversion scores
  • selfEsteem_z z-scored self-esteem scores
  • bpq_z z-scored body perception scores (attention paid to internal bodily functions)
  • bcq_z z-scored body competence scores (belief in body’s ability to complete physical tasks)
  • iri_z z-scored empathy scores
  • cati_z z-scored autism trait scores
personality_traits <- read.csv(here::here("data/personality_traits.csv")) %>%
  select(-X) # remove column X

#_z after each personality measure shows that the values are z-scored
head(personality_traits)
##      ID age extraversion_z selfEsteem_z      bpq_z      bcq_z      iri_z
## 1 70114  19      1.6646044   2.13444417 -0.4376815  0.5662393 -0.5431865
## 2 69734  23     -1.5962480  -0.02007314  0.8789409  0.2423866  0.3126705
## 3 69955  23      1.5287356  -0.88188007  0.2206297 -0.7291714  0.7016965
## 4 69833  21     -0.9169038   2.13444417  0.1657704  0.8900920 -2.6439265
## 5 69996  18     -0.5092972  -1.09733180 -1.4800076 -0.4053187  2.0243846
## 6 69837  20     -0.1016907   0.62628205 -1.0411335  0.2423866 -1.2434332
##        cati_z
## 1 -0.02183085
## 2  0.22481943
## 3 -0.02183085
## 4  0.47146971
## 5  0.22481943
## 6 -1.87170796

1. Aesthetics + colors

Every figure start with the axis and some grouping information. We put these into the aes() function.

# two different ways to start the same plot
# the dataframe goes right after ggplot()
ggplot(data = personality_traits, aes(x = extraversion_z, y = age))

ggplot(data = personality_traits, aes(x = extraversion_z, y = age))+
  geom_point()

# or you can start with the dataframe and pipe (%>%) into a plot
personality_traits %>%
  ggplot(aes(x = extraversion_z, y = age))+
  geom_point(color = "blue") # try "red", "cornflowerblue", "maroon"

Colors

Many colors can be called by name: 627 ggplot color names.

They can also be called using hexcodes: html color picker.

Super useful for matching institutional colors/poster background is that you can match colors from images/screenshots: html color matcher.

For discrete data (examples below), pre-made color palettes save a lot of time: Color palettes.

personality_traits %>%
  ggplot(aes(x = extraversion_z, y = age))+
  geom_point(color = "#dd4e2c", size = 3) # Rladies purple #89398a

2. Geoms

Geoms refers to the type of plot (e.g., scatter plot, boxplot, barplot, heatmap, etc.). Each field seems to have it’s favourites, so here are the one’s I use most. When I need inspiration, I peruse 50 best plots with code and look for a geom that’s suitable for my data.

In the plots below, try playing with the parameters inside the different geom_x() functions.

Continuous data

# distributions (only need x OR y)
# try changing fill, color, linewidth, alpha (0-1)
personality_traits %>%
  ggplot(aes(x = extraversion_z)) +
  geom_density(fill = "lightblue", color = "orange", linewidth = 4, alpha = 1) 

# histograms (only need x OR y)
# try changing fill, color, binwith (in same units as data)
personality_traits %>%
  ggplot(aes(x = extraversion_z)) +
  geom_histogram(fill = "lightblue", color = "purple", binwidth = .2)

# scatter plot
# try changing color, shape, and size
personality_traits %>%
  ggplot(aes(x = extraversion_z, y = age)) +
  geom_jitter(color = "purple", fill = "yellow", shape = 21, size = 3)

# scatter plot with trend line
# try changing color, fill, method (loess, lm)
personality_traits %>%
  ggplot(aes(x = extraversion_z, y = age)) +
  geom_jitter(color = "purple", fill = "salmon", shape = 25, size = 4)+
  geom_smooth(method = "loess", color = "maroon", fill = "darkgreen")
## `geom_smooth()` using formula = 'y ~ x'

# same plot with vertical and horizontal lines
# try changing linetype (dashed, dotted, solid), and moving the line geoms above the jitter geom in the code (below the line with ggplot())
personality_traits %>%
  ggplot(aes(x = extraversion_z, y = age)) +
  geom_jitter(color = "purple", fill = "salmon", shape = 25, size = 4)+
  geom_vline(xintercept = 1, color = "maroon", linetype = "dashed",
             linewidth =2)+
  geom_hline(yintercept = 30, color = "orange", linetype = "solid",
             linewidth =3)

Discrete

First, let’s make age into a categorical variable.

# make a column for a categorical age variable
personality_traits1 <- personality_traits %>%
  mutate(agegroup = as.factor(case_when(age < 26 ~ "<25",
                                       age > 24 & age < 40 ~ "25-39", 
                                       age > 39 ~ "40+")))

levels(personality_traits1$agegroup)
## [1] "<25"   "25-39" "40+"
# get n per group
traits_age_n <- personality_traits1 %>%
  group_by(agegroup) %>%
  tally()

With discrete variables, we can use “grouping aesthetics”.

# barplot (for count data)
traits_age_n %>%
  ggplot(aes(x = agegroup, y = n))+
  geom_bar(stat = "identity", fill = "#89398a")

# boxplots (group with continuous, base ggplot colors)
personality_traits1 %>%
  ggplot(aes(x = agegroup, y = extraversion_z, fill = agegroup))+
  geom_boxplot()

# violin plot
personality_traits1 %>%
  ggplot(aes(x = agegroup, y = extraversion_z, fill = agegroup))+
  geom_violin() +
  scale_fill_brewer(palette = "Dark2")

# mutliple overlayed geoms
personality_traits1 %>%
  ggplot(aes(x = agegroup, y = extraversion_z, fill = agegroup))+
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey")+
  geom_violin(alpha = 0.7)+
  geom_jitter(width = 0.1, size = 1)+
  scale_fill_brewer(palette = "Dark2")

# mutliple side by side
personality_traits1 %>%
  ggplot(aes(x = agegroup, y = extraversion_z, fill = agegroup))+
  geom_boxplot(alpha = 0.7, width = 0.3, position= position_nudge(x=-.3))+
  geom_jitter(width = 0.1, size = 1)+
  scale_fill_viridis(discrete = TRUE) # add option="magma"

personality_traits1 %>%
  ggplot(aes(x = agegroup, y = extraversion_z, fill = agegroup))+
  introdataviz::geom_flat_violin(position=position_nudge(x=.4),
                                 adjust = 0.5, alpha = 0.8)+
  geom_boxplot(width = 0.1, position=position_nudge(x=.25), alpha = 0.8)+
  geom_point(shape = 21, position = position_jitter(width = .15), alpha = 0.8)+
  scale_fill_viridis(discrete = TRUE) # add option="magma"
## Warning: Using the `size` aesthetic with geom_polygon was deprecated in ggplot2 3.4.0.
## ℹ Please use the `linewidth` aesthetic instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

2+ variables

Let’s take two of our personality traits. cati_z is a measure of autism traits and iri_z is a measure of empathy.

# make a column for a categorical age variable
personality_traits2 <- personality_traits1 %>%
  select(ID, age, iri_z, cati_z, agegroup) %>%
  pivot_longer(cols = c(iri_z, cati_z),
               names_to = "measure", values_to = "value")
# dumbel plots / join-up dots
personality_traits2 %>%
  ggplot(aes(x = measure, y = value, group = ID))+
  geom_point()+
  geom_path()+
  facet_grid(.~agegroup)

# let's add a column with whether the difference is positive or negative
personality_traits3 <- personality_traits1 %>%
  mutate(direction = iri_z - cati_z) %>%
  mutate(direction = as.factor(
    case_when(direction > 0 ~ "positive",TRUE ~ "negative"))) %>%
  select(ID, age, cati_z, iri_z, agegroup, direction) %>%
  pivot_longer(cols = c(iri_z, cati_z),
               names_to = "measure", values_to = "value")

# dumbels with colour
personality_traits3 %>%
  ggplot(aes(x = measure, y = value, group = ID, color = direction))+
  geom_point()+
  geom_path()+
  # set colours manually
  scale_color_manual(values = c("blue", "magenta"))+
  # make pannels for each category
  facet_grid(.~agegroup)

3. Themes

Back to basic ggplot2 themes and customisation! Play around and change parameters.

ggplot themes

# try changing theme
personality_traits3 %>%
  ggplot(aes(x = measure, y = value, group = ID, color = direction))+
  geom_point()+
  geom_path()+
  # set colours manually
  scale_color_manual(values = c("blue", "magenta"))+
  # make pannels for each category
  facet_grid(.~agegroup)+
  # theme_grey()+
  # theme_bw()+
  # theme_light()+
  # theme_dark()+
  theme_classic()

Custom themes

I’ve made these quite extreme so you can get a feeling for the different types of parameters that can be customised. A VERY long list of all the individual theme parts you can change is available: theme components.

Example: Too Much

theme_set(theme_light()+
            theme(
              plot.background = element_rect(fill = "black"),
              plot.title = element_text(size=rel(2), face="bold", color="lightblue"),
              axis.title = element_text(size=rel(1), face="bold", color="lightblue"),
              axis.text = element_text(size=rel(1), colour='magenta'),
              strip.background = element_rect(fill = "magenta"),
              strip.text = element_text(size=rel(1), colour = 'white', face = "bold"),
              legend.text = element_text(size=rel(1)),
              legend.title = element_text(size=rel(1),face="bold"),
              legend.background = element_rect(fill = "lightblue"),
              panel.background = element_rect(fill = "lightblue"),
              panel.grid.major = element_line(color = "white"),
              panel.grid.minor = element_line(color = "white")))  


personality_traits3 %>%
  ggplot(aes(x = measure, y = value, group = ID))+
  geom_point()+
  geom_path()+
  labs(title = "Palette tester plot", x = "Info", y = "Value")+
  facet_grid(.~agegroup)

Example: Spooky

theme_set(theme_light()+
            theme(
              text=element_text(family="mono"),
              plot.background = element_rect(fill = "white"),
              plot.title = element_text(size=rel(2), face="italic"),
              axis.title = element_text(size=rel(1), face="bold", color="black"),
              axis.text = element_text(size=rel(1), colour='black', angle = 45, hjust = 1),
              strip.background = element_rect(fill = "white"),
              strip.text = element_text(size=rel(1), colour = 'orange', face = "bold"),
              legend.text = element_text(size=rel(1)),
              legend.title = element_text(size=rel(1),face="bold"),
              panel.background = element_rect(fill = "white"),
              panel.grid.major.x = element_blank(),
              panel.grid.major.y = element_line(color = "red", linewidth = 3),
              panel.grid.minor = element_line(color = "orange", linetype = "dashed", linewidth = 2))) 

personality_traits3 %>%
  ggplot(aes(x = measure, y = value, group = ID))+
  geom_point()+
  geom_path()+
  labs(title = "Palette tester plot", x = "Info", y = "Value")+
  facet_grid(.~agegroup)

Example: R Ladies

After this plot, practice saving the figure.

theme_set(theme_light()+
            theme(
              plot.title = element_text(size=rel(1.2),face="bold"),
              axis.title = element_text(size=rel(1),face="bold"),
              axis.text = element_text(size=rel(1),colour = 'black'),
              strip.text = element_text(size=rel(1),colour = 'white', 
                                        face = "bold"),
              strip.background =element_rect(fill="#89398a"),
              legend.text = element_text(size=rel(1)),
              legend.title = element_text(size=rel(1),face="bold"),
              panel.border = element_rect(color = "#89398a"),
              panel.grid = element_blank(),
              panel.grid.major = element_line(color = "#89398a",
                                              linetype = "dotted", linewidth = 0.5)))


personality_traits3 %>%
  ggplot(aes(x = measure, y = value, group = ID, color = direction))+
  geom_point()+ # try out transparency levels
  geom_path()+
  scale_color_manual(values = c("#38A3A5", "#80ED99"))+
  labs(title = "Palette tester plot", x = "Info", y = "Value")+
  facet_grid(.~agegroup)

# Don't forget to save!
ggsave("figures/RLadies_traits.jpg", height = 3, width = 6, dpi = 400)

4. Combining plots

differences <- personality_traits3 %>%
  ggplot(aes(x = measure, y = value, group = ID, color = direction))+
  geom_point()+ # try out transparency levels
  geom_path()+
  scale_color_manual(values = c("#38A3A5", "#80ED99"))+
  labs(x = "Info", y = "Value")+
  facet_grid(.~agegroup)

distributions <- personality_traits3 %>%
  ggplot(aes(x = value, fill = measure))+
  geom_density(alpha = 0.6)+
  scale_fill_manual(values = c("#361C35", "#89398a"))+
  labs(x = "Value", y = "Density")

Arrangement

# top/bottom
ggarrange(distributions, differences, nrow=2)

ggsave("figures/RLadies_combo1.jpg", height = 6, width = 6, dpi = 400)

# side by side
ggarrange(distributions, differences, widths = c(0.6, 1))

ggsave("figures/RLadies_combo2.jpg", height = 3, width = 10, dpi = 400)

Common legends

differences <- personality_traits3 %>%
  ggplot(aes(x = measure, y = value,color = measure))+
  geom_jitter(width = 0.2)+ # try out transparency levels
  scale_color_manual(values = c("#38A3A5", "#80ED99"))+
  labs(x = "Info", y = "Value")+
  facet_grid(.~agegroup)

distributions <- personality_traits3 %>%
  ggplot(aes(x = value, fill = measure))+
  geom_density(alpha = 0.6)+
  scale_fill_manual(values = c("#38A3A5", "#80ED99"))+
  labs(x = "Value", y = "Density")

# top/bottom
ggarrange(distributions, differences, nrow=2, common.legend = TRUE)

ggsave("figures/RLadies_combo3_50.jpg", height = 6, width = 6, dpi = 50)